rm(list=ls())
library(tidyverse)
library(haven)

# Read Election Data and do some corrections of identified mistakes and tie elimination ------------
mayor_elections <- read_rds(here::here("data","processed","elections","mayor_elections_2000_2016.rds"))

# Exclude 2016 
mayor_elections <- mayor_elections %>% 
  filter(ano_eleicao != 2016)

# Check exclusion
mayor_elections %>% distinct(ano_eleicao)

# In checks below we identified that these cases are not in the dataset
# The winner of GRACCHO CARDOSO and LARANJEIRAS in 2004 is not in the dataset. Hand coded based on csv file at raw data folder 
# These csv files were downloaded from (accessed on July 08 2020)
# http://www.tse.jus.br/eleicoes/eleicoes-anteriores/eleicoes-2004/candidaturas-votacao-e-resultados/resultado-da-eleicao-2004
mayor_elections <- mayor_elections %>%
  add_row(
    data_geracao = NA,
    hora_geracao = NA,
    ano_eleicao = 2004,
    num_turno = 1,
    descricao_eleicao = "ELEICOES 2004",
    sigla_uf = "SE",
    sigla_ue = "31712",
    codigo_municipio = "31712",
    nome_municipio = "LARANJEIRAS",
    numero_zona = 13,
    codigo_cargo = 11,
    numero_cand = 25,
    sq_candidato = NA,
    nome_candidato = "PAULO HAGENBECK",
    nome_urna_candidato = NA,
    descricao_cargo = "PREFEITO",
    cod_sit_cand_superior = NA,
    desc_sit_cand_superior = "APTO",
    codigo_sit_candidato = NA,
    desc_sit_candidato = "DEFERIDO",
    codigo_sit_cand_tot = NA,
    desc_sit_cand_tot = "ELEITO",
    numero_partido = 25,
    sigla_partido = "PFL",
    nome_partido = NA,
    sequencial_legenda = NA,
    nome_coligacao = NA,
    composicao_legenda = "#NE#",
    total_votos = 6609,
    irregular = 0
  ) %>%
  add_row(
    data_geracao = NA,
    hora_geracao = NA,
    ano_eleicao = 2004,
    num_turno = 1,
    descricao_eleicao = "ELEICOES 2004",
    sigla_uf = "SE",
    sigla_ue = "31712",
    codigo_municipio = "31712",
    nome_municipio = "LARANJEIRAS",
    numero_zona = 13,
    codigo_cargo = 11,
    numero_cand = 14,
    sq_candidato = NA,
    nome_candidato = "MARIA IONE MACEDO SOBRAL",
    nome_urna_candidato = NA,
    descricao_cargo = "PREFEITO",
    cod_sit_cand_superior = NA,
    desc_sit_cand_superior = "APTO",
    codigo_sit_candidato = NA,
    desc_sit_candidato = "DEFERIDO",
    codigo_sit_cand_tot = NA,
    desc_sit_cand_tot = "NAO ELEITO",
    numero_partido = 14,
    sigla_partido = "PTB",
    nome_partido = NA,
    sequencial_legenda = NA,
    nome_coligacao = NA,
    composicao_legenda = "#NE#",
    total_votos = 5530,
    irregular = 0
  ) %>%
  add_row(
    data_geracao = NA,
    hora_geracao = NA,
    ano_eleicao = 2004,
    num_turno = 1,
    descricao_eleicao = "ELEICOES 2004",
    sigla_uf = "SE",
    sigla_ue = "31518",
    codigo_municipio = "31518",
    nome_municipio = "GRACCHO CARDOSO",
    numero_zona = 3,
    codigo_cargo = 11,
    numero_cand = 11,
    sq_candidato = NA,
    nome_candidato = "JOSE EUNAPIO DOS SANTOS",
    nome_urna_candidato = NA,
    descricao_cargo = "PREFEITO",
    cod_sit_cand_superior = NA,
    desc_sit_cand_superior = "APTO",
    codigo_sit_candidato = NA,
    desc_sit_candidato = "DEFERIDO",
    codigo_sit_cand_tot = NA,
    desc_sit_cand_tot = "ELEITO",
    numero_partido = 11,
    sigla_partido = "PP",
    nome_partido = NA,
    sequencial_legenda = NA,
    nome_coligacao = NA,
    composicao_legenda = "#NE#",
    total_votos = 2065, 
    irregular = 0
  )

# Check candidacy situation variables
mayor_elections %>%
  group_by(desc_sit_cand_tot) %>%
  summarise(n_obs = n())

mayor_elections %>%
  group_by(desc_sit_candidato) %>%
  summarise(n_obs = n())

# Keep only desc_sit_cand_tot == ELEITO or NAO ELEITO

mayor_elections_cleaned <- mayor_elections %>%
  mutate(desc_sit_cand_tot = ifelse(desc_sit_cand_tot == "ELEITO POR QUOCIENTE PARTIDARIO",
                                    "ELEITO",
                                    desc_sit_cand_tot)) %>% 
  filter(desc_sit_cand_tot %in% c("ELEITO","NAO ELEITO"))

mayor_elections_cleaned <- mayor_elections_cleaned %>%
  filter((desc_sit_candidato %in% c("DEFERIDO","DEFERIDO COM RECURSO",
                                   "SUB JUDICE", "SUBSTITUTO PENDENTE DE JULGAMENTO",
                                   "SUBSTITUTO MAJORITARIO PENDENTE DE JULGAMENTO") |
            is.na(desc_sit_candidato)))

# Check if filters workerd
mayor_elections_cleaned %>% distinct(desc_sit_cand_tot)
mayor_elections_cleaned %>% distinct(desc_sit_candidato)

# Compute vote shares of elections that went to 2nd round -------
mayor_2nd <- mayor_elections_cleaned %>% 
  filter(num_turno == 2)

# compute vote shares
vote_shares_2nd <- mayor_2nd %>%  
  group_by(ano_eleicao,sigla_uf,codigo_municipio,nome_municipio,num_turno,sigla_partido,nome_candidato,sq_candidato,desc_sit_candidato,desc_sit_cand_tot,composicao_legenda,irregular,descricao_eleicao) %>% 
  summarise(total_votes_cand = sum(total_votos,na.rm = T)) %>% 
  group_by(ano_eleicao,sigla_uf,codigo_municipio,nome_municipio,num_turno) %>% 
  mutate(total_votes_mun = sum(total_votes_cand, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(vote_share = total_votes_cand/total_votes_mun*100) %>% 
  group_by(ano_eleicao,codigo_municipio) %>% 
  mutate(rank = row_number(desc(vote_share)))  %>% 
  ungroup() 

# Check if rank == 2 is always the non-elected candidate
vote_shares_2nd %>% 
  filter(rank == 2) %>% 
  distinct(desc_sit_cand_tot) # OK 

# Check if rank == 1 is always the elected candidate
vote_shares_2nd %>% 
  filter(rank == 1) %>% 
  distinct(desc_sit_cand_tot) # OK PASS TEST

# Compute vote shares of elections decided on the 1st round -------

mayor_1st <- mayor_elections_cleaned %>% 
  filter(num_turno == 1)

# Eliminate elections that went for a runoff
mayor_1st <- mayor_1st %>% 
  anti_join(vote_shares_2nd, by = c("ano_eleicao","codigo_municipio","sigla_uf","descricao_eleicao","irregular"))

#These ties were identified in the checks below and we add 1 vote to the elected to eliminate the tie
mayor_1st <- mayor_1st %>%
  mutate(total_votos = ifelse(codigo_municipio %in% c("81698","73415","85790") & ano_eleicao == 2000 & desc_sit_cand_tot == "ELEITO",total_votos+1,total_votos),
         total_votos = ifelse(codigo_municipio %in% c("61182","81191","08486") & ano_eleicao == 2004 & desc_sit_cand_tot == "ELEITO",total_votos+1,total_votos),
         total_votos = ifelse(codigo_municipio %in% c("34215","44490") & ano_eleicao == 2008 & desc_sit_cand_tot == "ELEITO",total_votos+1,total_votos),
         total_votos = ifelse(codigo_municipio %in% c("74438","61972") & ano_eleicao == 2012 & desc_sit_cand_tot == "ELEITO",total_votos+1,total_votos))

# compute vote shares
vote_shares_1st <- mayor_1st %>% 
  group_by(ano_eleicao,sigla_uf,codigo_municipio,nome_municipio,num_turno,sigla_partido,nome_candidato,sq_candidato,desc_sit_candidato,desc_sit_cand_tot,composicao_legenda,irregular,descricao_eleicao) %>% 
  summarise(total_votes_cand = sum(total_votos,na.rm = T)) %>% 
  group_by(ano_eleicao,sigla_uf,codigo_municipio,nome_municipio,num_turno) %>% 
  mutate(total_votes_mun = sum(total_votes_cand, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(vote_share = total_votes_cand/total_votes_mun*100) %>% 
  group_by(ano_eleicao,codigo_municipio) %>% 
  mutate(rank = row_number(desc(vote_share)))  %>% 
  ungroup()

# Check if rank == 2 is always the non-elected candidate
vote_shares_1st %>% 
  filter(rank == 2) %>% 
  distinct(desc_sit_cand_tot) # OK 

# Check if rank == 1 is always elected
vote_shares_1st %>% 
  filter(rank == 1) %>% 
  distinct(desc_sit_cand_tot) # Do not pass test

# Check wrong cases above
wrong_cases <- vote_shares_1st %>% 
  filter(rank == 1, irregular == 0, desc_sit_cand_tot == "NAO ELEITO") 

# Comment: in the 7 wrong cases above, there is a problem in the candidacy registration of the winner
# We turn the vote shares of all candidates in that election NA
wrong_cases <- wrong_cases %>% 
  mutate(wrong = "1") %>% 
  select(codigo_municipio,ano_eleicao,descricao_eleicao,wrong)

vote_shares_1st <- vote_shares_1st %>% 
  left_join(wrong_cases, by = c("codigo_municipio","ano_eleicao","descricao_eleicao")) 

vote_shares_1st %>% filter(wrong == "1") %>% distinct(codigo_municipio,ano_eleicao,descricao_eleicao,wrong)

vote_shares_1st %>% filter(is.na(vote_share))

vote_shares_1st <- vote_shares_1st %>% 
  mutate(vote_share = ifelse(wrong %in% c("1"), NA, vote_share))

vote_shares_1st %>% filter(is.na(vote_share)) %>% distinct(codigo_municipio,ano_eleicao,descricao_eleicao)

# Check if update worked
check_wrong_cases_cities <- vote_shares_1st %>% 
  filter(is.na(vote_share)) %>% 
  distinct(codigo_municipio,ano_eleicao,descricao_eleicao)

setdiff(check_wrong_cases_cities,select(wrong_cases,-wrong)) #OK pass test


# Put two rounds together
vote_shares <- bind_rows(vote_shares_1st,vote_shares_2nd)

# Create a check to see if there is any tie not in the first place! (like 2nd and 3rd)

vote_shares %>% 
  group_by(ano_eleicao,codigo_municipio,vote_share,num_turno) %>% 
  summarise(n_obs = n()) %>% 
  filter(n_obs>1,!near(vote_share,0))

# Comment: There are 5 ties in not between first and second place. 
# These ties don`t affect the computation of the margin of victory.

# Check if we have the party showing up at most 1 time per year election
vote_shares %>% 
  group_by(ano_eleicao,codigo_municipio,sigla_partido,irregular) %>% 
  summarise(n_obs = n()) %>% 
  filter(n_obs>1)  # OK Pass test

# Create dummy left -------------------
party_classification <- readxl::read_excel(here::here("data","raw","party_classification.xls"))

# Check left
left_parties <- party_classification %>% filter(left_dummy == 1)

vote_shares <- vote_shares %>% 
  left_join(party_classification, by = c("sigla_partido"="party")) %>% 
  rename(mayor_left = left_dummy)

# Check party classifictions
vote_shares %>% rename(left_dummy = mayor_left) %>%
  distinct(left_dummy,sigla_partido) %>% 
  rename(party = sigla_partido) %>% 
  setdiff(party_classification) %>% 
  nrow() # OK Pass test (all parties classified according to xsl file)

# Create runoff indicator
vote_shares <- vote_shares %>% 
  mutate(runoff = ifelse(num_turno == 2,1,0)) 

# check runoff
vote_shares %>% distinct(runoff,num_turno)

vote_shares <- vote_shares %>% 
  select(-num_turno, -composicao_legenda)

# Read coalition data
mayor_coalitions_ideology <- read_rds(here::here("data","processed","elections","mayor_coalition_ideology.rds")) %>% 
  select(-nome_municipio)

vote_shares_coalition_ideology <- vote_shares %>% 
  left_join(mayor_coalitions_ideology, by = c("ano_eleicao","sigla_uf","codigo_municipio" = "sigla_ue","sigla_partido")) 


# Create Margin of Victory ----

# prepare data to be in wider format
margin_of_victories <- vote_shares_coalition_ideology %>% 
  select(ano_eleicao:nome_municipio,runoff,irregular,rank,nome_candidato,sq_candidato,sigla_partido,total_votes_cand:vote_share,mayor_left,composicao_coligacao:coalition_left_lagw) %>%
  gather(variable, value, -(ano_eleicao:rank)) %>% 
  unite(rank,rank,variable) %>% 
  spread(rank,value, sep = "_") %>%  
  mutate_at(vars(matches("rank"),-matches("sigla|coliga|nome")),
            .funs = funs(as.numeric(.))) 

# Create a variable that allows us to check the rank of the best opposing candidate 
margin_of_victories <- margin_of_victories %>% 
  mutate(rank_best_opposition = ifelse(rank_1_mayor_left != rank_2_mayor_left,
                                       2,
                                       ifelse(rank_1_mayor_left!=rank_3_mayor_left,
                                              3,
                                              ifelse(rank_1_mayor_left!=rank_4_mayor_left,
                                                     4,
                                                     ifelse(rank_1_mayor_left!=rank_5_mayor_left,
                                                            5,
                                                            ifelse(rank_1_mayor_left!=rank_6_mayor_left,
                                                                   6,
                                                                   ifelse(rank_1_mayor_left!=rank_7_mayor_left,
                                                                          7,
                                                                          ifelse(rank_1_mayor_left!=rank_8_mayor_left,
                                                                                 8,
                                                                                 ifelse(rank_1_mayor_left!=rank_9_mayor_left,
                                                                                        9,
                                                                                        ifelse(rank_1_mayor_left!=rank_10_mayor_left,
                                                                                               10,
                                                                                               ifelse(rank_1_mayor_left!=rank_11_mayor_left,
                                                                                                      11,
                                                                                                      ifelse(rank_1_mayor_left!=rank_12_mayor_left,
                                                                                                             12,
                                                                                                             13))))))))))))

# Compute Margin of victory
margin_of_victories <- margin_of_victories %>% 
  mutate(margin_mayor_left = ifelse(rank_1_mayor_left != rank_2_mayor_left,
                                    rank_1_vote_share - rank_2_vote_share,
                                    ifelse(rank_1_mayor_left!=rank_3_mayor_left,
                                           rank_1_vote_share - rank_3_vote_share,
                                           ifelse(rank_1_mayor_left!=rank_4_mayor_left,
                                                  rank_1_vote_share - rank_4_vote_share,
                                                  ifelse(rank_1_mayor_left!=rank_5_mayor_left,
                                                         rank_1_vote_share - rank_5_vote_share,
                                                         ifelse(rank_1_mayor_left!=rank_6_mayor_left,
                                                                rank_1_vote_share - rank_6_vote_share,
                                                                ifelse(rank_1_mayor_left!=rank_7_mayor_left,
                                                                       rank_1_vote_share - rank_7_vote_share,
                                                                       ifelse(rank_1_mayor_left!=rank_8_mayor_left,
                                                                              rank_1_vote_share - rank_8_vote_share,
                                                                              ifelse(rank_1_mayor_left!=rank_9_mayor_left,
                                                                                     rank_1_vote_share - rank_9_vote_share,
                                                                                     ifelse(rank_1_mayor_left!=rank_10_mayor_left,
                                                                                            rank_1_vote_share - rank_10_vote_share,
                                                                                            ifelse(rank_1_mayor_left!=rank_11_mayor_left,
                                                                                                   rank_1_vote_share - rank_11_vote_share,
                                                                                                   ifelse(rank_1_mayor_left!=rank_12_mayor_left,
                                                                                                          rank_1_vote_share - rank_12_vote_share,
                                                                                                          rank_1_vote_share - rank_13_vote_share))))))))))),
         margin_mayor_left = ifelse(rank_1_mayor_left==1,margin_mayor_left,(-1)*margin_mayor_left),
  )

# Compute ideo difference
margin_of_victories <- margin_of_victories %>% 
  mutate(coalition_ideo_dist = ifelse(rank_1_mayor_left != rank_2_mayor_left,
                                    rank_1_coalition_ideo - rank_2_coalition_ideo,
                                    ifelse(rank_1_mayor_left!=rank_3_mayor_left,
                                           rank_1_coalition_ideo - rank_3_coalition_ideo,
                                           ifelse(rank_1_mayor_left!=rank_4_mayor_left,
                                                  rank_1_coalition_ideo - rank_4_coalition_ideo,
                                                  ifelse(rank_1_mayor_left!=rank_5_mayor_left,
                                                         rank_1_coalition_ideo - rank_5_coalition_ideo,
                                                         ifelse(rank_1_mayor_left!=rank_6_mayor_left,
                                                                rank_1_coalition_ideo - rank_6_coalition_ideo,
                                                                ifelse(rank_1_mayor_left!=rank_7_mayor_left,
                                                                       rank_1_coalition_ideo - rank_7_coalition_ideo,
                                                                       ifelse(rank_1_mayor_left!=rank_8_mayor_left,
                                                                              rank_1_coalition_ideo - rank_8_coalition_ideo,
                                                                              ifelse(rank_1_mayor_left!=rank_9_mayor_left,
                                                                                     rank_1_coalition_ideo - rank_9_coalition_ideo,
                                                                                     ifelse(rank_1_mayor_left!=rank_10_mayor_left,
                                                                                            rank_1_coalition_ideo - rank_10_coalition_ideo,
                                                                                            ifelse(rank_1_mayor_left!=rank_11_mayor_left,
                                                                                                   rank_1_coalition_ideo - rank_11_coalition_ideo,
                                                                                                   ifelse(rank_1_mayor_left!=rank_12_mayor_left,
                                                                                                          rank_1_coalition_ideo - rank_12_coalition_ideo,
                                                                                                          rank_1_coalition_ideo - rank_13_coalition_ideo))))))))))),
         coalition_ideo_dist = ifelse(rank_1_mayor_left == 0, coalition_ideo_dist,(-1)*coalition_ideo_dist),
         coalition_ideo_dist_lagw = ifelse(rank_1_mayor_left != rank_2_mayor_left,
                                      rank_1_coalition_ideo_lagw - rank_2_coalition_ideo_lagw,
                                      ifelse(rank_1_mayor_left!=rank_3_mayor_left,
                                             rank_1_coalition_ideo_lagw - rank_3_coalition_ideo_lagw,
                                             ifelse(rank_1_mayor_left!=rank_4_mayor_left,
                                                    rank_1_coalition_ideo_lagw - rank_4_coalition_ideo_lagw,
                                                    ifelse(rank_1_mayor_left!=rank_5_mayor_left,
                                                           rank_1_coalition_ideo_lagw - rank_5_coalition_ideo_lagw,
                                                           ifelse(rank_1_mayor_left!=rank_6_mayor_left,
                                                                  rank_1_coalition_ideo_lagw - rank_6_coalition_ideo_lagw,
                                                                  ifelse(rank_1_mayor_left!=rank_7_mayor_left,
                                                                         rank_1_coalition_ideo_lagw - rank_7_coalition_ideo_lagw,
                                                                         ifelse(rank_1_mayor_left!=rank_8_mayor_left,
                                                                                rank_1_coalition_ideo_lagw - rank_8_coalition_ideo_lagw,
                                                                                ifelse(rank_1_mayor_left!=rank_9_mayor_left,
                                                                                       rank_1_coalition_ideo_lagw - rank_9_coalition_ideo_lagw,
                                                                                       ifelse(rank_1_mayor_left!=rank_10_mayor_left,
                                                                                              rank_1_coalition_ideo_lagw - rank_10_coalition_ideo_lagw,
                                                                                              ifelse(rank_1_mayor_left!=rank_11_mayor_left,
                                                                                                     rank_1_coalition_ideo_lagw - rank_11_coalition_ideo_lagw,
                                                                                                     ifelse(rank_1_mayor_left!=rank_12_mayor_left,
                                                                                                            rank_1_coalition_ideo_lagw - rank_12_coalition_ideo_lagw,
                                                                                                            rank_1_coalition_ideo_lagw - rank_13_coalition_ideo_lagw))))))))))),
         coalition_ideo_dist_lagw = ifelse(rank_1_mayor_left == 0, coalition_ideo_dist_lagw,(-1)*coalition_ideo_dist_lagw),
  )


# Max rank of most voted 
margin_of_victories %>% 
  mutate(max_rank = max(rank_best_opposition, na.rm = T)) %>% 
  distinct(max_rank)


# Check if all margin > 0 (margin < 0)  have mayor_left =1 (mayor_left =0)
margin_of_victories %>% 
  filter(margin_mayor_left>0) %>% 
  distinct(rank_1_mayor_left)

margin_of_victories %>% 
  filter(margin_mayor_left<0) %>% 
  distinct(rank_1_mayor_left)

# Insert IBGE Codes ---------
tse_to_ibge <- read_delim(here::here("data","raw","correspondencia_municipios_tse_ibge.csv"),
                          skip =1,
                          delim = ";",
                          col_names=c("uf","tse_name","tse_code","mun_code","mun_name"),
                          col_types = "ccccc",
                          locale = locale(encodin="latin1")) %>% 
  #select(mun_code,tse_code) %>% 
  mutate(mun_code = as.character(str_sub(mun_code,1,-2)),
         tse_code = as.numeric(tse_code))

# Check correspondence
margin_of_victories %>% 
  mutate(codigo_municipio = as.numeric(codigo_municipio)) %>%
  anti_join(tse_to_ibge, by = c("codigo_municipio" = "tse_code")) %>% 
  distinct(codigo_municipio)

# Join

margin_of_victories <- margin_of_victories %>%
  mutate(codigo_municipio_num = as.numeric(codigo_municipio)) %>%
  left_join(tse_to_ibge, by = c("codigo_municipio_num" = "tse_code")) %>%
  relocate(ano_eleicao,sigla_uf,mun_code,nome_municipio,runoff,margin_mayor_left,coalition_ideo_dist,coalition_ideo_dist_lagw,rank_best_opposition) %>%
  select(-codigo_municipio,-codigo_municipio_num,-uf,-tse_name,-mun_name)


# Prepare margin long 
margin_of_victories_long <- margin_of_victories %>% 
  rename(best_opposition = rank_best_opposition) %>% 
  mutate_at(vars(starts_with("rank_")),~(as.character(.))) %>% 
  select_at(vars(ano_eleicao,sigla_uf,mun_code,nome_municipio,irregular,ends_with("sigla_partido"),ends_with("nome_candidato"),ends_with("sq_candidato"),ends_with("mayor_left"),margin_mayor_left,best_opposition)) %>% 
  pivot_longer(cols = starts_with("rank"),values_to = "values", names_to = "variables") %>% 
  extract(variables, into = c("rank","var"), regex = "(rank_[0-9]+)_([a-z]+_[a-z]+)") %>% 
  mutate(rank = as.numeric(str_replace_all(rank,"rank_",""))) %>% 
  pivot_wider(names_from = "var", values_from = "values")

# Eliminate 
margin_of_victories_long <- margin_of_victories_long %>% 
  filter(!(is.na(sigla_partido) & is.na(nome_candidato)))

# SaveRDS -----

write_rds(margin_of_victories,here::here("data","processed","elections","margin_of_victories.rds"))

write_dta(margin_of_victories_long, here::here("data","processed","margin_of_victories_long.dta"))


